###############################################################################################################
#Replication Code for "Who Wants to Be a Suicide Bomber? Evidence from Islamic State Recruits"
# Andrea Michelle Morris

library("ggplot2")
library("stargazer")
library("survival")

dat <- read.csv("repDatISQ.csv")

# age squared
dat$age2<- dat$age*dat$age

# age graph
ggplot(dat, aes(age) ) +
  geom_histogram(binwidth = 0.5) + labs(x="Age", y="Count") + expand_limits(x = 0, y = 0)
ggsave("Figure2.tiff", units="in", width=5, height=4, dpi=300, compression = 'lzw')
dev.off()

################################ main table
glm <- glm(binRole ~ religous + work + educ + maritalStatus + kids + age + age2 + prevJihad,family = binomial(link = "logit"),data = dat)
clogit1 <- clogit(binRole ~ strata(countryOrigin) + religous + work + educ + maritalStatus + kids + age + age2 + prevJihad,data = dat)
clogit2 <- clogit(binRole ~ strata(countryOrigin) + strata(entryNA) + religous + work + educ + maritalStatus + kids + age + age2 + prevJihad,data = dat)
clogit3 <- clogit(binRole ~ strata(countryOrigin) + strata(entryNA) + religous + work + educ + maritalStatus + kids + age + age2 + prevJihad + donation,data = dat)
stargazer(glm,clogit1,clogit2,clogit3,omit=c("countryOrigin","entryNA"))

############## appendix

# cateorical variables
# condense no education and primary school into one category (b/c separation)
dat$educ2 <- ifelse(dat$educ==1,2,dat$educ)
dat$educ2 <- dat$educ2-1
dat$religous2 <- as.factor(dat$religous)
is.factor(dat$religous2)
dat$educ3 <- as.factor(dat$educ2)
dat$work2 <- as.factor(dat$work)
dat$donation2 <- as.factor(dat$donation)

#### table 1
glmApp <- glm(binRole ~ religous2 + work2 + educ3 + maritalStatus + kids + age + age2 + prevJihad,family = binomial(link = "logit"),data = dat)
clogit1App <- clogit(binRole ~ strata(countryOrigin) + religous2 + work2 + educ3 + maritalStatus + kids + age + age2 + prevJihad,data = dat)
clogit2App <- clogit(binRole ~ strata(countryOrigin) + strata(entryNA) + religous2 + work2 + educ3 + maritalStatus + kids + age + age2 + prevJihad,data = dat)
clogit3App <- clogit(binRole ~ strata(countryOrigin) + strata(entryNA) + religous2 + work2 + educ3 + maritalStatus + kids + age + age2 + prevJihad + donation2,data = dat)
stargazer(glmApp,clogit1App,clogit2App,clogit3App,omit=c("countryOrigin","entryNA"))

##### linear probability models: table 2
lm <- lm(binRole ~ religous + work + educ + maritalStatus + kids + age + age2 + prevJihad,data = dat)
git1 <- lm(binRole ~ factor(countryOrigin) + religous + work + educ + maritalStatus + kids + age + age2 + prevJihad,data = dat)
git2 <- lm(binRole ~ factor(countryOrigin) + factor(entryNA) + religous + work + educ + maritalStatus + kids + age + age2 + prevJihad,data = dat)
git3 <- lm(binRole ~ factor(countryOrigin) + factor(entryNA) + religous + work + educ + maritalStatus + kids + age + age2 + prevJihad + donation,data = dat)
stargazer(lm,git1,git2,git3,omit=c("countryOrigin","entryNA"))

### T Tests
t.test(religous~binRole,data=dat)
t.test(educ~binRole,data=dat)
t.test(work~binRole,data=dat)
t.test(kids~binRole,data=dat)
t.test(maritalStatus~binRole,data=dat)
t.test(prevJihad~binRole,data=dat)
t.test(age~binRole,data=dat)
t.test(donation~binRole,data=dat)

# just main countries - table 4
mainCount <- clogit(binRole ~ strata(countryOrigin) + religous + work + educ + prevJihad + maritalStatus + kids + age + age2, 
                     data = subset(dat,dat$countryOrigin==40 | dat$countryOrigin==10 | dat$countryOrigin==7 |
                                     dat$countryOrigin==11 | dat$countryOrigin==2))
sa <- glm(binRole ~ religous + work + educ + prevJihad + maritalStatus + kids + age + age2,
          family = binomial(link = "logit"),data = subset(dat,dat$countryOrigin==40))
tun <- glm(binRole ~ religous + work + educ + prevJihad + maritalStatus + kids + age + age2,
           family = binomial(link = "logit"),data = subset(dat,dat$countryOrigin==10))
mor <- glm(binRole ~ religous + work + educ + prevJihad + maritalStatus + kids + age + age2,
           family = binomial(link = "logit"),data = subset(dat,dat$countryOrigin==7))
egy <- glm(binRole ~ religous + work + educ + prevJihad + maritalStatus + kids + age + age2,
           family = binomial(link = "logit"),data = subset(dat,dat$countryOrigin==2))
turkey <- glm(binRole ~ religous + work + educ + prevJihad + maritalStatus + kids + age + age2,
              family = binomial(link = "logit"),data = subset(dat,dat$countryOrigin==11))
stargazer(mainCount,sa,tun,mor,egy,turkey)

# just main countries - table 5 - categorical
mainCount2 <- clogit(binRole ~ strata(countryOrigin) + religous2 + work2 + educ3 + prevJihad + maritalStatus + kids + age + age2, 
                     data = subset(dat,dat$countryOrigin==40 | dat$countryOrigin==10 | dat$countryOrigin==7 |
                                     dat$countryOrigin==11 | dat$countryOrigin==2))
sa1 <- glm(binRole ~ religous2 + work2 + educ3 + prevJihad + maritalStatus + kids + age + age2,
          family = binomial(link = "logit"),data = subset(dat,dat$countryOrigin==40))
tun1 <- glm(binRole ~ religous2 + work2 + educ3 + prevJihad + maritalStatus + kids + age + age2,
           family = binomial(link = "logit"),data = subset(dat,dat$countryOrigin==10))
mor1 <- glm(binRole ~ religous2 + work2 + educ3 + prevJihad + maritalStatus + kids + age + age2,
           family = binomial(link = "logit"),data = subset(dat,dat$countryOrigin==7))
egy1 <- glm(binRole ~ religous2 + work2 + educ3 + prevJihad + maritalStatus + kids + age + age2,
           family = binomial(link = "logit"),data = subset(dat,dat$countryOrigin==2))
turkey1 <- glm(binRole ~ religous2 + work2 + educ3 + prevJihad + maritalStatus + kids + age + age2,
              family = binomial(link = "logit"),data = subset(dat,dat$countryOrigin==11))
stargazer(mainCount2,sa1,tun1,mor1,egy1,turkey1)

# list wise deletion of main countries - table 6
sa2 <- clogit(binRole ~ strata(countryOrigin) + religous + work + educ + prevJihad + maritalStatus + kids + age + age2,
             data = subset(dat,dat$countryOrigin!=40))
tun2 <- clogit(binRole ~ strata(countryOrigin) + religous + work + educ + prevJihad + maritalStatus + kids + age + age2,
              data = subset(dat,dat$countryOrigin!=10))
mor2 <- clogit(binRole ~ strata(countryOrigin) + religous + work + educ + prevJihad + maritalStatus + kids + age + age2,
              data = subset(dat,dat$countryOrigin!=7))
egy2 <- clogit(binRole ~ strata(countryOrigin) + religous + work + educ + prevJihad + maritalStatus + kids + age + age2,
              data = subset(dat,dat$countryOrigin!=2))
turkey2 <- clogit(binRole ~ strata(countryOrigin) + religous + work + educ + prevJihad + maritalStatus + kids + age + age2,
                 data = subset(dat,dat$countryOrigin!=11))
stargazer(sa2,tun2,mor2,egy2,turkey2)

# list wise deletion of main countries - categorical - table 7
sa3 <- clogit(binRole ~ strata(countryOrigin) + religous2 + work2 + educ3 + prevJihad + maritalStatus + kids + age + age2,
             data = subset(dat,dat$countryOrigin!=40))
tun3 <- clogit(binRole ~ strata(countryOrigin) + religous2 + work2 + educ3 + prevJihad + maritalStatus + kids + age + age2,
              data = subset(dat,dat$countryOrigin!=10))
mor3 <- clogit(binRole ~ strata(countryOrigin) + religous2 + work2 + educ3 + prevJihad + maritalStatus + kids + age + age2,
              data = subset(dat,dat$countryOrigin!=7))
egy3 <- clogit(binRole ~ strata(countryOrigin) + religous2 + work2 + educ3 + prevJihad + maritalStatus + kids + age + age2,
              data = subset(dat,dat$countryOrigin!=2))
turkey3 <- clogit(binRole ~ strata(countryOrigin) + religous2 + work2 + educ3 + prevJihad + maritalStatus + kids + age + age2,
                 data = subset(dat,dat$countryOrigin!=11))
stargazer(sa3,tun3,mor3,egy3,turkey3)

# missing data map
library("Amelia")
data <- cbind.data.frame(dat$binRole,dat$religous,dat$work,dat$educ,dat$prevJihad,dat$maritalStatus,dat$kids,dat$age,dat$countryOrigin,dat$donation)
names(data) <- c("Desired Role","Religious Knowledge","Occupation","Education","Jihad Previously","Marital Status","Children","Age","Country","Donation")
missmap(data,legend=TRUE,col=c("black","grey"))

##################################################################################################
################ predicted probaility plots

# remove NAs for ggplot
data2 <- subset(dat, dat$religous >= 1)
data2 <- subset(data2, data2$binRole >=0)
data2 <- subset(data2, data2$work >=0)
data2 <- subset(data2, data2$educ >=0)
data2 <- subset(data2, data2$maritalStatus >=0)
data2 <- subset(data2, data2$prevJihad >=0)

logit2 <- glm(binRole ~ religous + work + educ + maritalStatus + kids + age + age2 + prevJihad, family = binomial(link = "logit"),data = data2)

# predicted probability plot religion
newdata <- with(data2,
                data.frame(religous = seq(1, 3, 0.25), work = median(work), educ = median(educ), maritalStatus = median(maritalStatus),
                           kids=median(kids), age=median(age),age2=median(age2), prevJihad=median(prevJihad)))

newdata$relP <- predict(logit2, newdata = newdata, type = "response")

# Predict the fitted values given the model and hypothetical data
predicted.data <- as.data.frame(predict(logit2, newdata = newdata, type="link", se=TRUE))

# Combine the hypothetical data and predicted values
new.data <- cbind(newdata, predicted.data)

# Calculate confidence intervals
std <- qnorm(0.95 / 2 + 0.5)
new.data$ymin <- logit2$family$linkinv(new.data$fit - std * new.data$se)
new.data$ymax <- logit2$family$linkinv(new.data$fit + std * new.data$se)
new.data$fit <- logit2$family$linkinv(new.data$fit)  # Rescale to 0-1

# Plot everything
p <- ggplot(data2, aes(x=religous, y=binRole)) 
p + 
  geom_ribbon(data=new.data, aes(y=fit, ymin=ymin, ymax=ymax), alpha=0.2) + 
  geom_line(data=new.data, aes(y=fit)) +
  theme(plot.title = element_text(hjust = 0.5)) +
  labs(x="Knowledge of Islam", y="Probability of Suicide Attacker") + scale_x_continuous(breaks=c(1.0,1.5,2.0,2.5,3.0),label =c("Basic", " ", "Average"," ","Advanced"))

ggsave("Figure4.tiff", units="in", width=5, height=4, dpi=300, compression = 'lzw')
dev.off()

####### predicted probability plot education
newdata <- with(data2,
                data.frame(religous = median(religous), work = median(work), educ = seq(1, 4, 0.25), maritalStatus = median(maritalStatus),
                           kids=median(kids), age=median(age),age2=median(age2), prevJihad=median(prevJihad)))

newdata$educP <- predict(logit2, newdata = newdata, type = "response")

# Predict the fitted values given the model and hypothetical data
predicted.data <- as.data.frame(predict(logit2, newdata = newdata, 
                                        type="link", se=TRUE))

# Combine the hypothetical data and predicted values
new.data <- cbind(newdata, predicted.data)

# Calculate confidence intervals
std <- qnorm(0.95 / 2 + 0.5)
new.data$ymin <- logit2$family$linkinv(new.data$fit - std * new.data$se)
new.data$ymax <- logit2$family$linkinv(new.data$fit + std * new.data$se)
new.data$fit <- logit2$family$linkinv(new.data$fit)  # Rescale to 0-1

# Plot everything
p <- ggplot(data2, aes(x=educ, y=binRole)) 
p + 
  geom_ribbon(data=new.data, aes(y=fit, ymin=ymin, ymax=ymax), alpha=0.25) + 
  geom_line(data=new.data, aes(y=fit)) +
  theme(plot.title = element_text(hjust = 0.5)) +
  labs(x="Education", y="Probability of Suicide Attacker") + scale_x_continuous(breaks=c(1,2,3,4),label =c("None", "Primary", "Secondary","University"))

ggsave("Figure5.tiff", units="in", width=5, height=4, dpi=300, compression = 'lzw')
dev.off()

##### predicted probability plot donation and varying value of work
# holding other varaibles at median

# remove 21 NAs for donation
data3 <- subset(data2, data2$donation >=0)
model <- glm(binRole ~ religous + work + educ2 + prevJihad + maritalStatus + kids + age + age2 + donation2,family = binomial(link = "logit"),data = data3)

newdata2 <- with(data3, data.frame(religous = median(religous), work = rep(seq(from = 1, to = 5, length.out = 100),4), educ2 = median(educ2),prevJihad = median(prevJihad),
                                    maritalStatus = median(maritalStatus),kids = median(kids), age = median(age), age2 = median(age2),
                                    donation2 = factor(rep(0:3, each = 100))))
newdata2$rankP <- predict(model, newdata = newdata2, type = "response")

newdata3 <- cbind(newdata2, predict(model, newdata = newdata2, type = "link",se = TRUE))

newdata3 <- within(newdata3, {
  PredictedProb <- plogis(fit)
  LL <- plogis(fit - (1.96 * se.fit))
  UL <- plogis(fit + (1.96 * se.fit))})

ggplot(newdata3, aes(x = work, y = PredictedProb)) + geom_ribbon(aes(ymin = LL,
                                                                     ymax = UL, fill = donation2), alpha = 0.2) + geom_line(aes(colour = donation2,linetype=donation2),
                                                                                                                            size = 1) + labs(x="Occupation", y="Probability of Suicide Attacker") + 
  scale_x_continuous(breaks=c(1,2,3,4,5),label =c("Unemployed", "Unskilled", "Student","Skilled","High Skill")) +
  scale_fill_discrete(name="Donation",breaks=c(0,1,2,3),labels=c("None", "Passport", "Passport & Phone","More")) +
  scale_colour_discrete(name="Donation",breaks=c(0,1,2,3),labels=c("None", "Passport", "Passport & Phone","More")) +
  scale_linetype_discrete(name="Donation",breaks=c(0,1,2,3),labels=c("None", "Passport", "Passport & Phone","More"))

ggsave("Figure6.tiff", units="in", width=5, height=4, dpi=300, compression = 'lzw')
dev.off()

##### predicted probability plot religion - categorical
# rel varying education pred probs

# use education as 3 categories here
mod1 <- glm(binRole ~ religous2 + work + educ2 + prevJihad + maritalStatus + kids + age + age2,family = binomial(link = "logit"),data = data2)

newdata2 <- with(data2, data.frame(religous2 = factor(rep(1:3, each = 100)), work = median(work), educ2 = rep(seq(from = 1, to = 3, length.out = 100),3),
                                    prevJihad = median(prevJihad),maritalStatus = median(maritalStatus),kids = median(kids), age = median(age), age2 = median(age2)))
newdata2$rankP <- predict(mod1, newdata = newdata2, type = "response")

newdata3 <- cbind(newdata2, predict(mod1, newdata = newdata2, type = "link",se = TRUE))

newdata3 <- within(newdata3, {
  PredictedProb <- plogis(fit)
  LL <- plogis(fit - (1.96 * se.fit))
  UL <- plogis(fit + (1.96 * se.fit))})

ggplot(newdata3, aes(x = educ2, y = PredictedProb)) + geom_ribbon(aes(ymin = LL,
                                                                      ymax = UL, fill = religous2), alpha = 0.2) + geom_line(aes(colour = religous2,linetype=religous2),
                                                                                                                             size = 1) + labs(x="Education", y="Probability of Suicide Attacker") + 
  scale_x_continuous(breaks=c(1,2,3),label =c("Primary", "Secondary", "University")) +
  scale_fill_discrete(name="Religious Knowledge",breaks=c(1,2,3),labels=c("Novice", "Intermediate", "Advanced")) +
  scale_colour_discrete(name="Religious Knowledge",breaks=c(1,2,3),labels=c("Novice", "Intermediate", "Advanced")) +
  scale_linetype_discrete(name="Religious Knowledge",breaks=c(1,2,3),labels=c("Novice", "Intermediate", "Advanced"))

##### predicted probability plot education - categorical
# education varying with work
mod <- glm(binRole ~ religous + work + educ3 + prevJihad + maritalStatus + kids + age + age2,family = binomial(link = "logit"),data = data2)

newdata2 <- with(data2, data.frame(religous = median(religous), work = rep(seq(from = 1, to = 5, length.out = 100),3), educ3 =  factor(rep(1:3, each = 100)),
                                    prevJihad = median(prevJihad),maritalStatus = median(maritalStatus),kids = median(kids), age = median(age), age2 = median(age2)))
newdata2$rankP <- predict(mod, newdata = newdata2, type = "response")

newdata3 <- cbind(newdata2, predict(mod, newdata = newdata2, type = "link",se = TRUE))

newdata3 <- within(newdata3, {
  PredictedProb <- plogis(fit)
  LL <- plogis(fit - (1.96 * se.fit))
  UL <- plogis(fit + (1.96 * se.fit))})

ggplot(newdata3, aes(x = work, y = PredictedProb)) + geom_ribbon(aes(ymin = LL,
                                                                     ymax = UL, fill = educ3), alpha = 0.2) + geom_line(aes(colour = educ3,linetype=educ3),
                                                                                                                          size = 1) + labs(x="Occupation", y="Probability of Suicide Attacker") + 
  scale_x_continuous(breaks=c(1,2,3,4,5),labels=c("Unemployed", "Unskilled", "Student","Skilled","High Skill")) +
  scale_fill_discrete(name="Education",breaks=c(1,2,3),label =c("Primary", "Secondary", "University")) +
  scale_colour_discrete(name="Education",breaks=c(1,2,3),label =c("Primary", "Secondary", "University")) +
  scale_linetype_discrete(name="Education",breaks=c(1,2,3),label =c("Primary", "Secondary", "University"))

#############################################################################################################
# Figure 2 - World Map
library("RColorBrewer")
library("maptools")
library("rworldmap")
library("classInt")

dev.off()
map <- read.csv("formap1.csv")

data(countryExData)
classInt <- classIntervals( map[["isisRecruits"]],n=9, style = "jenks")
catMethod = classInt[["brks"]]
map$isisRecruits[is.na(map$isisRecruits)] <- 0
sPDF <- joinCountryData2Map( map
                             ,joinCode = "ISO3"
                             ,nameJoinColumn = "ISO3V10")
mapDevice() #create world map shaped window
mapCountryData(sPDF,nameColumnToPlot='isisRecruits',catMethod = catMethod,colourPalette = brewer.pal(9,'RdPu'),mapTitle = 'Country Origin')


do.call(addMapLegend
        ,c(mapCountryData(sPDF,nameColumnToPlot='isisRecruits',addLegend = FALSE, catMethod = catMethod,
                          colourPalette = brewer.pal(9,'Reds'),borderCol = "black",oceanCol="white",mapTitle=' ',missingCountryCol = "white",lwd=0.9)
           ,legendLabels="all"
           ,legendWidth=0.5
           ,legendIntervals="data"
           ,legendMar = 6))

########################################################################################################################